{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Illustration of the definition of a Tomek link\n\nThis example illustrates what is a Tomek link.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# Authors: Guillaume Lemaitre <g.lemaitre58@gmail.com>\n# License: MIT"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "print(__doc__)\n\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n\nsns.set_context(\"poster\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "This function allows to make nice plotting\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "def make_plot_despine(ax):\n    sns.despine(ax=ax, offset=10)\n    ax.set_xlim([0, 3])\n    ax.set_ylim([0, 3])\n    ax.set_xlabel(r\"$X_1$\")\n    ax.set_ylabel(r\"$X_2$\")\n    ax.legend(loc=\"lower right\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "We will generate some toy data that illustrates how\n:class:`~imblearn.under_sampling.TomekLinks` is used to clean a dataset.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import numpy as np\n\nrng = np.random.RandomState(18)\n\nX_minority = np.transpose(\n    [[1.1, 1.3, 1.15, 0.8, 0.55, 2.1], [1.0, 1.5, 1.7, 2.5, 0.55, 1.9]]\n)\nX_majority = np.transpose(\n    [\n        [2.1, 2.12, 2.13, 2.14, 2.2, 2.3, 2.5, 2.45],\n        [1.5, 2.1, 2.7, 0.9, 1.0, 1.4, 2.4, 2.9],\n    ]\n)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "In the figure above, the samples highlighted in green form a Tomek link since\nthey are of different classes and are nearest neighbors of each other.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "fig, ax = plt.subplots(figsize=(8, 8))\nax.scatter(\n    X_minority[:, 0],\n    X_minority[:, 1],\n    label=\"Minority class\",\n    s=200,\n    marker=\"_\",\n)\nax.scatter(\n    X_majority[:, 0],\n    X_majority[:, 1],\n    label=\"Majority class\",\n    s=200,\n    marker=\"+\",\n)\n\n# highlight the samples of interest\nax.scatter(\n    [X_minority[-1, 0], X_majority[1, 0]],\n    [X_minority[-1, 1], X_majority[1, 1]],\n    label=\"Tomek link\",\n    s=200,\n    alpha=0.3,\n)\nmake_plot_despine(ax)\nfig.suptitle(\"Illustration of a Tomek link\")\nfig.tight_layout()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "We can run the :class:`~imblearn.under_sampling.TomekLinks` sampling to\nremove the corresponding samples. If `sampling_strategy='auto'` only the\nsample from the majority class will be removed. If `sampling_strategy='all'`\nboth samples will be removed.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "from imblearn.under_sampling import TomekLinks\n\nfig, axs = plt.subplots(nrows=1, ncols=2, figsize=(16, 8))\n\nsamplers = {\n    \"Removing only majority samples\": TomekLinks(sampling_strategy=\"auto\"),\n    \"Removing all samples\": TomekLinks(sampling_strategy=\"all\"),\n}\n\nfor ax, (title, sampler) in zip(axs, samplers.items()):\n    X_res, y_res = sampler.fit_resample(\n        np.vstack((X_minority, X_majority)),\n        np.array([0] * X_minority.shape[0] + [1] * X_majority.shape[0]),\n    )\n    ax.scatter(\n        X_res[y_res == 0][:, 0],\n        X_res[y_res == 0][:, 1],\n        label=\"Minority class\",\n        s=200,\n        marker=\"_\",\n    )\n    ax.scatter(\n        X_res[y_res == 1][:, 0],\n        X_res[y_res == 1][:, 1],\n        label=\"Majority class\",\n        s=200,\n        marker=\"+\",\n    )\n\n    # highlight the samples of interest\n    ax.scatter(\n        [X_minority[-1, 0], X_majority[1, 0]],\n        [X_minority[-1, 1], X_majority[1, 1]],\n        label=\"Tomek link\",\n        s=200,\n        alpha=0.3,\n    )\n\n    ax.set_title(title)\n    make_plot_despine(ax)\nfig.tight_layout()\n\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.10.4"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}